*************************************************************************************************************************************************
* This replication file estimates demand expansion due to entry, as well as fixed cost and lambda bounds when transportation costs are 75 Euro. *
*************************************************************************************************************************************************
*1. Predict output levels based on the demand model and load estimated market expansion measures.
*2. Calculate fixed cost bounds based on presumed values of lambda (0 and 1).

*************************
* 0 Set path to files *
************************* 
clear all
capture log close 
set more off
set trace off
mata: mata set matastrict off
cd $main_directory

****************************************************************************************************
* 1. Predict output levels based on the demand model and load estimated market expansion measures. * 
****************************************************************************************************

use "Data_Belgium\Generated_data\baseline_demand_dataset.dta", clear
qui do "Command_files\spatial_demand_Q.do"
global transportation_estimate=75

//Set predetermined values:
sca scale=10
mata: mata matuse "Estimates\betas_Q", replace
mata: mata matuse "Estimates\betas_Q1", replace
mata: mata matuse "Estimates\betas_Q2", replace
mata: mata matuse "Estimates\cost_price_parameters_t`=$transportation_estimate'", replace
mata:    st_numscalar("p_Q1",p_Q1)
mata:    st_numscalar("p_Q2",p_Q2)
mata:    st_numscalar("alpha",alpha)
mata:    st_numscalar("nu",nu)
mata:    st_numscalar("aL",aL)
mata:    st_numscalar("phi",phi)
mata:    st_numscalar("aM",aM)
mata:    st_numscalar("fixed_cost",fixed_cost)
//Set predetermined values 
set seed 1234
global select_lambda `" 1"'
sca lambda=$select_lambda 

gen association=(NotPerOff>1)
label define astatus 0 "Single notary" 1 "Association"
label value association astatus

//Calculate the total population on the market
bysort market_ID:  gen dup_market = cond(_N==1,0,_n)
qui su population if dup_market<2
sca sumPopulation=r(sum)
sca di sumPopulation

//Generate a unique numeric identifier for the market.
sort market_ID
egen market_ID_num = group(market_ID)

//Calculate the total demand on the market
qui su Q1 if common==1 
sca sumQ1=r(sum)
sca di sumQ1

//Calculate total potential demand per capita as 10 times larger than the current value.
scalar gamma_Q1=scale*sumQ1/sumPopulation
sca di gamma_Q1
//Calculate the market size (i.e. number of possible transactions)
gen market_size_Q1=gamma_Q1*population

//Define the product for which demand will be estimated (in this case total transactions).
gen firm_demand_Q1=Q1 
drop if firm_demand_Q1==.
drop if firm_demand_Q1<=0

// Controls: 
global controls_utility distance lnNotPerOff start_not LDE Perc_Young Perc_Elderly log_income popdens
global controls_total_Q1 firm_ID firm_demand_Q1 market_ID_num market_size_Q1 $controls_utility

*************************
* STATUS QUO PREDICTION *
*************************
//We begin with predictions of the demand and consumer surplus, since they require the full dataset (all market-notary combinations).
//Demand prediction Q1_IV
sort market_ID
mata:
 X    = st_data(., "$controls_total_Q1") //This selects the corresponding control variables.
    nx    = rows(X)
    X    = X,J(nx,1,1)
	Results=spdemand_log(X,betas_Q1)
	n_firms=rows(Results)
    st_numscalar("n_firms",n_firms)
	firm_ID    = st_data(., "firm_ID") 
	Prediction=J(nx,1,.)
	for(i=1; i<=nx; i++) { //i goes over the market-notary combinations
    for(j=1; j<=n_firms; j++) {
	if (firm_ID[i]==Results[j,1]) {
		Prediction[i]=Results[j,2]
	}
	}
	}
prediction_exp=exp(Prediction)
st_addvar("double","Q1_IV")
st_store(.,"Q1_IV",prediction_exp)
end


//Consumer surplus prediction CS_total_N
sort market_ID
mata:
 X    = st_data(., "$controls_total_Q1") //This selects the corresponding control variables.
    nx    = rows(X)
    X    = X,J(nx,1,1)
   CS_matrix=consumer_surplus(X,betas_Q1,transportation_cost)
   CS_total_Q1=sum(CS_matrix[,2])
   st_numscalar("CS_total_Q1_current", CS_total_Q1)
end
di CS_total_Q1_current
gen CS_total_Q1_current=lambda*CS_total_Q1_current

//Demand prediction Q2_IV

//Calculate the total demand on the market
qui su Q2 if common==1
sca sumQ2=r(sum)
sca di sumQ2

//Calculate total potential demand per capita as 10 times larger than the current value.
scalar gamma_Q2=scale*sumQ2/sumPopulation
sca di gamma_Q2
//Calculate the market size (i.e. number of possible transactions)
gen market_size_Q2=gamma_Q2*population

//Define the product for which demand will be estimated (in this case total transactions).
gen firm_demand_Q2=Q2 
drop if firm_demand_Q2==.
drop if firm_demand_Q2<=0

//Define the values inside the X matrix.
global controls_total_Q2 firm_ID firm_demand_Q2 market_ID_num market_size_Q2 $controls_utility

sort market_ID
mata:
 X    = st_data(., "$controls_total_Q2") //This selects the corresponding control variables.
    nx    = rows(X)
    X    = X,J(nx,1,1)
	Results=spdemand_log(X,betas_Q2)
	n_firms=rows(Results)
    st_numscalar("n_firms",n_firms)
	firm_ID    = st_data(., "firm_ID") 
	Prediction=J(nx,1,.)
	for(i=1; i<=nx; i++) { //i goes over the market-notary combinations
    for(j=1; j<=n_firms; j++) {
	if (firm_ID[i]==Results[j,1]) {
		Prediction[i]=Results[j,2]
	}
	}
	}
prediction_exp=exp(Prediction)
st_addvar("double","Q2_IV")
st_store(.,"Q2_IV",prediction_exp)
end


//Consumer surplus prediction CS_total_N
sort market_ID
mata:
 X    = st_data(., "$controls_total_Q2") //This selects the corresponding control variables.
    nx    = rows(X)
    X    = X,J(nx,1,1)
   CS_matrix=consumer_surplus(X,betas_Q2,transportation_cost)
   CS_total_Q2=sum(CS_matrix[,2])
   st_numscalar("CS_total_Q2_current", CS_total_Q2)
end
gen CS_total_Q2_current=lambda*CS_total_Q2_current
gen CS_total_current=CS_total_Q2_current+CS_total_Q1_current

//We then calculate costs and profits (this requires only 1 observation per notary).
keep if common==1
sort firm_ID
gen L2_Q=exp(-aL)*(alpha*Q1_IV+(1-alpha)*Q2_IV)^(1/nu)
gen M2_Q=exp(-aM/phi)*(alpha*Q1_IV+(1-alpha)*Q2_IV)^(1/(nu*phi))
su L2_Q
gen L2_industry_current=r(sum)

su M2_Q
gen M2_industry_current=r(sum)

*variable cost and marginal cost functions
gen VC_Q=L2_Q+M2_Q
su VC_Q
gen VC_industry_current=r(sum)

*Output levels
su Q1_IV
gen Q1_industry_current=r(sum)
su Q2_IV
gen Q2_industry_current=r(sum)
gen Q_IV=Q1_IV+Q2_IV
su Q_IV
gen Q_industry_current=r(sum)
gen sales_per_notary=Q_IV/NotPerOff
su sales_per_notary
gen Q_average_predicted_per_notary=r(mean)
gen sales_per_notary_Q1=Q1_IV/NotPerOff
gen sales_per_notary_Q2=Q2_IV/NotPerOff
su sales_per_notary_Q1
gen Q1_average_predicted_per_notary=r(mean)
su sales_per_notary_Q2
gen Q2_average_predicted_per_notary=r(mean)
su NotPerOff
sca notaries=r(sum) //Calculate the total number of notaries

*Variable profits at the status quo
gen v_profit_individual_n=p_Q1*Q1_IV+p_Q2*Q2_IV-VC_Q
su v_profit_individual_n 
gen v_profit_per_notary=v_profit_individual_n/NotPerOff
su v_profit_per_notary

*Industry profitability
gen revenue_individual_current=p_Q1*Q1_IV+p_Q2*Q2_IV
su revenue_individual_current
gen revenue_industry_current=r(sum)
gen v_profit_industry_current=p_Q1*Q1_industry_current+p_Q2*Q2_industry_current-VC_industry_current
gen profit_industry_current=v_profit_industry_current-fixed_cost*notaries
*This is the welfare without fixed costs
gen welfare_no_f_current=v_profit_industry_current+CS_total_current //Note that here already we have multiplied by lambda!

*******************************************************************************
*2. Calculate fixed cost bounds based on presumed values of lambda (0 and 1). *
*******************************************************************************

//Counterfactual output (calculated in the previous 2 stata files)
mata: mata matuse "Data_Belgium\Generated_data\sales_Q1_add_t`=$transportation_estimate'", replace
mata: mata matuse "Data_Belgium\Generated_data\sales_Q2_add_t`=$transportation_estimate'", replace
mata: mata matuse "Data_Belgium\Generated_data\sales_Q1_remove_t`=$transportation_estimate'", replace
mata: mata matuse "Data_Belgium\Generated_data\sales_Q2_remove_t`=$transportation_estimate'", replace

//Calculate market expansion and business stealing

//Collect the information on changes in output due to the removal of a notary from each office.
mata:
Q1_own_remove=diagonal(sales_Q1_remove) //The diagonal elements of sales_Q1_remove represent the sales of an office when 1 notary is removed from it.
Q2_own_remove=diagonal(sales_Q2_remove)
Q_own_remove=Q1_own_remove+Q2_own_remove
Q1_industry_remove=rowsum(sales_Q1_remove) //The row-sum of sales_Q1_remove represents the sales of all offices when 1 notary is removed from a given office.
Q2_industry_remove=rowsum(sales_Q2_remove)
sum(Q1_own_remove:==0)
Q_industry_remove=rowsum(sales_Q1_remove)+rowsum(sales_Q2_remove)
st_addvar("double","Q_own_remove")
st_store(.,"Q_own_remove",Q_own_remove)
st_addvar("double","Q1_own_remove")
st_store(.,"Q1_own_remove",Q1_own_remove)
st_addvar("double","Q2_own_remove")
st_store(.,"Q2_own_remove",Q2_own_remove)
st_addvar("double","Q_industry_remove")
st_store(.,"Q_industry_remove",Q_industry_remove)
st_addvar("double","Q1_industry_remove")
st_store(.,"Q1_industry_remove",Q1_industry_remove)
st_addvar("double","Q2_industry_remove")
st_store(.,"Q2_industry_remove",Q2_industry_remove)
end
replace Q1_own_remove=0 if NotPerOff==1 //The values of output are missing if there is only one notary in the office, since if it is removed, then the notary office is no longer open. Therefore sales are expected to be 0.
replace Q2_own_remove=0 if NotPerOff==1


//Collect the information on changes in output due to the addition of a notary to each office.
mata:
Q1_own_add=diagonal(sales_Q1_add)
Q2_own_add=diagonal(sales_Q2_add)
Q_own_add=Q1_own_add+Q2_own_add
Q_industry_add=rowsum(sales_Q1_add)+rowsum(sales_Q2_add)
st_addvar("double","Q_own_add")
st_store(.,"Q_own_add",Q_own_add)
st_addvar("double","Q1_own_add")
st_store(.,"Q1_own_add",Q1_own_add)
st_addvar("double","Q2_own_add")
st_store(.,"Q2_own_add",Q2_own_add)
st_addvar("double","Q_industry_add")
st_store(.,"Q_industry_add",Q_industry_add)
end
//Individual variable profit at the status quo
su v_profit_individual_n
//Individual variable profit after removal of a notary
gen v_profit_individual_n_1=p_Q1*Q1_own_remove+p_Q2*Q2_own_remove-(exp(-aL)*(alpha*Q1_own_remove+(1-alpha)*Q2_own_remove)^(1/nu)+exp(-aM/phi)*(alpha*Q1_own_remove+(1-alpha)*Q2_own_remove)^(1/(nu*phi)))
replace v_profit_individual_n_1=0 if NotPerOff==1
//Calculate the change in variable profits due to the entry of the marginal notary in an office.
gen change_v_profit_individual_n=v_profit_individual_n-v_profit_individual_n_1
//Individual variable profit after addition of a notary
gen v_profit_individual_n1=p_Q1*Q1_own_add+p_Q2*Q2_own_add-(exp(-aL)*(alpha*Q1_own_add+(1-alpha)*Q2_own_add)^(1/nu)+exp(-aM/phi)*(alpha*Q1_own_add+(1-alpha)*Q2_own_add)^(1/(nu*phi)))
gen change_v_profit_individual_n1=v_profit_individual_n1-v_profit_individual_n

//Total output
replace Q_own_remove=0 if Q_own_remove==.
//Calculate the additional sales due to the entry of the marginal notary
gen change_Q_own=Q_IV-Q_own_remove
su change_Q_own if association==0
gen change_single_notary=r(mean)
su change_Q_own if association==1
gen change_association=r(mean)

//Generate the change in sales for the entire industry
gen change_Q_total=Q_industry_current-Q_industry_remove
gen change_Q_perc=change_Q_total/Q_average_predicted_per_notary
//Calculate the sales by other notary offices
gen Q_others_current=Q_industry_current-Q_IV //Status quo
gen Q_others_remove=Q_industry_remove-Q_own_remove //Counterfactual
//Calculate the change in the sales of other firms due to entry in the specific notary office (i.e. business stealing)
gen change_Q_others=Q_others_current-Q_others_remove
//Calculate this as a percentage of average notary sales
gen diversion_ratio=change_Q_others/change_single_notary if association==0
replace diversion_ratio=change_Q_others/change_association if association==1

//Calculate the variable profits for alternative levels of sales. Here it is very important to sort by the firm ID to ensure correct merging of the mata data.
sort firm_ID
mata:
//Calculate labor costs for the industry in the case of addition or removal
LC_industry_add=rowsum(J(1152,1152,exp(-aL)):*(alpha:*sales_Q1_add:+(1-alpha):*sales_Q2_add):^(1/nu))
LC_industry_remove=rowsum(J(1152,1152,exp(-aL)):*(alpha:*sales_Q1_remove:+(1-alpha):*sales_Q2_remove):^(1/nu))
//Calculate intermediate input costs for the industry in the case of addition or removal
IntC_industry_add=rowsum(exp(J(1152,1152,(-aM/phi))):*(alpha:*sales_Q1_add:+(1-alpha):*sales_Q2_add):^(1/(nu*phi)))
IntC_industry_remove=rowsum(exp(J(1152,1152,(-aM/phi))):*(alpha:*sales_Q1_remove:+(1-alpha):*sales_Q2_remove):^(1/(nu*phi)))
//Calculate total industry costs in the case of addition or removal of notaries
VC_industry_add=LC_industry_add+IntC_industry_add
VC_industry_remove=LC_industry_remove+IntC_industry_remove
st_addvar("double","VC_industry_add")
st_store(.,"VC_industry_add",VC_industry_add)
st_addvar("double","VC_industry_remove")
st_store(.,"VC_industry_remove",VC_industry_remove)
//Revenue and variable profit for one product
Q1_industry_add=rowsum(sales_Q1_add)
Q1_industry_remove=rowsum(sales_Q1_remove)
Q2_industry_add=rowsum(sales_Q2_add)
Q2_industry_remove=rowsum(sales_Q2_remove)
R_industry_add=p_Q1*Q1_industry_add+p_Q2*Q2_industry_add
R_industry_remove=p_Q1*Q1_industry_remove+p_Q2*Q2_industry_remove
v_profit_industry_add=R_industry_add-VC_industry_add
v_profit_industry_remove=R_industry_remove-VC_industry_remove
st_addvar("double","v_profit_industry_add")
st_store(.,"v_profit_industry_add",v_profit_industry_add)
st_addvar("double","v_profit_industry_remove")
st_store(.,"v_profit_industry_remove",v_profit_industry_remove)
end

//Load the counterfactual surplus which was calculated in the previous file
mata: mata matuse "Data_Belgium\Generated_data\CS_total_Q1_remove_t`=$transportation_estimate'", replace
mata: mata matuse "Data_Belgium\Generated_data\CS_total_Q1_add_t`=$transportation_estimate'", replace
mata: mata matuse "Data_Belgium\Generated_data\CS_total_Q2_remove_t`=$transportation_estimate'", replace
mata: mata matuse "Data_Belgium\Generated_data\CS_total_Q2_add_t`=$transportation_estimate'", replace
mata: CS_total_add=CS_total_Q1_add+CS_total_Q2_add
mata: CS_total_remove=CS_total_Q1_remove+CS_total_Q2_remove

sort firm_ID
mata:
st_addvar("double","CS_total_add")
st_store(.,"CS_total_add",CS_total_add)
st_addvar("double","CS_total_remove")
st_store(.,"CS_total_remove",CS_total_remove)
end
//Calculate average variable profits and consumer surplus from adding and removing a notary (these generate the lower and upper bound of the fixed costs)
gen V_add=v_profit_industry_add-v_profit_industry_current
su V_add
sca meanV_add=r(mean)
gen C_add=CS_total_add-CS_total_current
su C_add
sca meanC_add=r(mean)
gen V_remove=v_profit_industry_current-v_profit_industry_remove
su V_remove  
sca meanV_remove=r(mean)
gen C_remove=CS_total_current-CS_total_remove
su C_remove
sca meanC_remove=r(mean)

************************************
* FIXED COST BOUNDS WITH BOOTSTRAP *
************************************
//Calculate fixed cost bounds
gen f_lower_add_welfare=lambda*meanC_add+meanV_add
su f_lower_add_welfare
sca f_lower_add_welfare=r(mean)
gen f_upper_remove_welfare=lambda*meanC_remove+meanV_remove
su f_upper_remove_welfare 
sca f_upper_remove_welfare=r(mean)
gen f_lower_add_industry=meanV_add
su f_lower_add_industry
sca f_lower_add_industry=r(mean)
gen f_upper_remove_industry=meanV_remove
su f_upper_remove_industry 
sca f_upper_remove_industry=r(mean)

//Calculate 95% confidence intervals
gen b_id=_n //Make a bootstrap identifier
qui su b_id
sca nobs=r(max) //This counts how many observations should be in each sample. We use the number of observations in the current dataset as the chosen sample size.
sca B=1000 //Select the number of bootstrap draws - we are taking a 1000 samples with the number of observations in each sample being equal to the number of observations in the dataset. 
set matsize 5000 //This just allows us to store large matrices in stata
set seed 1234
matrix b_f_remove_welfare = J(B,1,.) //This is a column vector in which we will store the mean f remove estimates, which will serve to identify the upper bound of the fixed costs under welfare maximization.
matrix b_f_add_welfare = J(B,1,.) //This is a column vector in which we will store the mean f add estimates, which will serve to identify the lower bound of the fixed costs under welfare maximization.
matrix b_f_remove_industry = J(B,1,.) //This is a column vector in which we will store the mean f remove estimates, which will serve to identify the upper bound of the fixed costs under joint profit maximization.
matrix b_f_add_industry = J(B,1,.) //This is a column vector in which we will store the mean f add estimates, which will serve to identify the lower bound of the fixed costs under joint profit maximization.

forvalues b= 1/1000 {
//Take random draws of notary office ids - this will be the sample for the bootstrap
if `b'> 1 {
drop randnum
}
gen randnum = runiformint(1,nobs) //We draw a random integer between 1 and the number of observations
//Make empty matrices to store the values of profits and consumer surplus for each observation
matrix V_remove_mat = J(nobs,1,.) //Make an empty matrix to store the random draws of V_remove (variable profits from removal)
matrix C_remove_mat = J(nobs,1,.) //Make an empty matrix to store the random draws of C_remove (consumer surplus from removal)
matrix V_add_mat = J(nobs,1,.) //Make an empty matrix to store the random draws of V_add (variable profits from addition)
matrix C_add_mat = J(nobs,1,.) //Make an empty matrix to store the random draws of C_add (consumer surplus from addition)
//Go over the ids in the sample and store the relevant information
forvalues i= 1/`=nobs' {
  mat V_remove_mat[`i',1]=V_remove[randnum[`i']] //This stores the variable profits from removal from the observation in the original sample which corresponds to the random number that has been drawn.
  mat C_remove_mat[`i',1]=C_remove[randnum[`i']] //A similar approach is taken for the rest of the relevant information
  mat V_add_mat[`i',1]=V_add[randnum[`i']]
  mat C_add_mat[`i',1]=C_add[randnum[`i']]
}
//Calculate the ratio of sums for the draw and store it
mata : st_matrix("mean_V_remove", colsum(st_matrix("V_remove_mat")) :/ colnonmissing(st_matrix("V_remove_mat"))) //The sum of the variable profits is divided by the number of non-missing cells in the variable profit vector, i.e. the number of observations.
mata : st_matrix("mean_C_remove", colsum(st_matrix("C_remove_mat")) :/ colnonmissing(st_matrix("C_remove_mat")))
mat b_f_remove_welfare[`b',1]=lambda*mean_C_remove[1,1]+mean_V_remove[1,1]
mat b_f_remove_industry[`b',1]=mean_V_remove[1,1]

mata : st_matrix("mean_V_add", colsum(st_matrix("V_add_mat")) :/ colnonmissing(st_matrix("V_add_mat")))
mata : st_matrix("mean_C_add", colsum(st_matrix("C_add_mat")) :/ colnonmissing(st_matrix("C_add_mat")))
mat b_f_add_welfare[`b',1]=lambda*mean_C_add[1,1]+mean_V_add[1,1]
mat b_f_add_industry[`b',1]=mean_V_add[1,1]
}
drop b_id randnum

//Make a matrix with the upper and lower bounds of lambda
mat f_bootstrap = b_f_remove_welfare,b_f_add_welfare,b_f_remove_industry,b_f_add_industry
matrix colnames f_bootstrap = b_f_remove_welfare b_f_add_welfare b_f_remove_industry b_f_add_industry
//Export the matrix into a new dataset of bootstrap iteration means
preserve
clear
svmat f_bootstrap, names(col)
//Calculate confidence intervals for fixed cost bounds if lambda=1
sca f_upper_remove_welfare=round(f_upper_remove_welfare,0.001)
di f_upper_remove_welfare 
egen p97_5_welfare_pct  = pctile(b_f_remove_welfare), p(97.5)
su p97_5_welfare_pct 
sca p97_5_welfare_unrounded=r(mean)
sca p97_5_welfare=round(p97_5_welfare_unrounded,0.001)
di p97_5_welfare

sca f_lower_add_welfare=round(f_lower_add_welfare,0.001)
di f_lower_add_welfare 
egen p2_5_welfare_pct= pctile(b_f_add_welfare), p(2.5)
su p2_5_welfare_pct 
sca p2_5_welfare_unrounded=r(mean)
sca p2_5_welfare=round(p2_5_welfare_unrounded,0.001)
di p2_5_welfare

//Calculate confidence intervals for fixed cost bounds if lambda=0
di f_upper_remove_industry 
egen p97_5_industry_pct= pctile(b_f_remove_industry), p(97.5)
su p97_5_industry_pct 
sca p97_5_industry_unrounded=r(mean)
sca p97_5_industry=round(p97_5_industry_unrounded,0.001)

di f_lower_add_industry
sca f_lower_add_industry=round(f_lower_add_industry,0.001)
egen p2_5_industry_pct= pctile(b_f_add_industry), p(2.5)
su p2_5_industry_pct 
sca p2_5_industry_unrounded=r(mean)
sca p2_5_industry=round(p2_5_industry_unrounded,0.001)

*Table A2 Row 1
di f_lower_add_industry " & " f_upper_remove_industry " & " p2_5_industry " &  " p97_5_industry
*Table A2 Row 2 (t_d=75)
di f_lower_add_welfare " & " f_upper_remove_welfare " & " p2_5_welfare " &  " p97_5_welfare 
restore
